{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CopyTransformer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A simple transformer that returns a copy of the input array, for example, as part of a scikit-learn pipeline."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> from mlxtend.preprocessing import CopyTransformer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Performing grid search...\n",
      "pipeline: ['vect', 'to_dense', 'clf']\n",
      "parameters:\n",
      "Fitting 2 folds for each of 9 candidates, totalling 18 fits\n",
      "Best score: 0.500\n",
      "Best parameters set:\n",
      "\tclf__max_features: 'sqrt'\n",
      "\tclf__n_estimators: 50\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    2.9s finished\n"
     ]
    }
   ],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from mlxtend.preprocessing import CopyTransformer\n",
    "import re\n",
    "import numpy as np\n",
    "\n",
    "X_train = np.array(['abc def ghi', 'this is a test',\n",
    "                    'this is a test', 'this is a test'])\n",
    "y_train = np.array([0, 0, 1, 1])\n",
    "\n",
    "pipe_1 = Pipeline([\n",
    "    ('vect', CountVectorizer()),\n",
    "    ('to_dense', CopyTransformer()),\n",
    "    ('clf', RandomForestClassifier())\n",
    "])\n",
    "\n",
    "parameters_1 = dict(\n",
    "    clf__n_estimators=[50, 100, 200],\n",
    "    clf__max_features=['sqrt', 'log2', None],)\n",
    "\n",
    "grid_search_1 = GridSearchCV(pipe_1, \n",
    "                             parameters_1, \n",
    "                             n_jobs=1, \n",
    "                             verbose=1,\n",
    "                             scoring='accuracy',\n",
    "                             cv=2)\n",
    "\n",
    "\n",
    "print(\"Performing grid search...\")\n",
    "print(\"pipeline:\", [name for name, _ in pipe_1.steps])\n",
    "print(\"parameters:\")\n",
    "grid_search_1.fit(X_train, y_train)\n",
    "print(\"Best score: %0.3f\" % grid_search_1.best_score_)\n",
    "print(\"Best parameters set:\")\n",
    "best_parameters_1 = grid_search_1.best_estimator_.get_params()\n",
    "for param_name in sorted(parameters_1.keys()):\n",
    "    print(\"\\t%s: %r\" % (param_name, best_parameters_1[param_name]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "## CopyTransformer\n",
      "\n",
      "*CopyTransformer()*\n",
      "\n",
      "Transformer that returns a copy of the input array\n",
      "\n",
      "For usage examples, please see\n",
      "[http://rasbt.github.io/mlxtend/user_guide/preprocessing/CopyTransformer/](http://rasbt.github.io/mlxtend/user_guide/preprocessing/CopyTransformer/)\n",
      "\n",
      "### Methods\n",
      "\n",
      "<hr>\n",
      "\n",
      "*fit(X, y=None)*\n",
      "\n",
      "Mock method. Does nothing.\n",
      "\n",
      "**Parameters**\n",
      "\n",
      "- `X` : {array-like, sparse matrix}, shape = [n_samples, n_features]\n",
      "\n",
      "    Training vectors, where n_samples is the number of samples and\n",
      "    n_features is the number of features.\n",
      "\n",
      "- `y` : array-like, shape = [n_samples] (default: None)\n",
      "\n",
      "\n",
      "**Returns**\n",
      "\n",
      "self\n",
      "\n",
      "<hr>\n",
      "\n",
      "*fit_transform(X, y=None)*\n",
      "\n",
      "Return a copy of the input array.\n",
      "\n",
      "**Parameters**\n",
      "\n",
      "- `X` : {array-like, sparse matrix}, shape = [n_samples, n_features]\n",
      "\n",
      "    Training vectors, where n_samples is the number of samples and\n",
      "    n_features is the number of features.\n",
      "\n",
      "- `y` : array-like, shape = [n_samples] (default: None)\n",
      "\n",
      "\n",
      "**Returns**\n",
      "\n",
      "- `X_copy` : copy of the input X array.\n",
      "\n",
      "\n",
      "<hr>\n",
      "\n",
      "*get_params(deep=True)*\n",
      "\n",
      "Get parameters for this estimator.\n",
      "\n",
      "**Parameters**\n",
      "\n",
      "- `deep` : boolean, optional\n",
      "\n",
      "    If True, will return the parameters for this estimator and\n",
      "    contained subobjects that are estimators.\n",
      "\n",
      "**Returns**\n",
      "\n",
      "- `params` : mapping of string to any\n",
      "\n",
      "    Parameter names mapped to their values.\n",
      "\n",
      "<hr>\n",
      "\n",
      "*set_params(**params)*\n",
      "\n",
      "Set the parameters of this estimator.\n",
      "\n",
      "The method works on simple estimators as well as on nested objects\n",
      "(such as pipelines). The latter have parameters of the form\n",
      "``<component>__<parameter>`` so that it's possible to update each\n",
      "component of a nested object.\n",
      "\n",
      "**Returns**\n",
      "\n",
      "self\n",
      "\n",
      "<hr>\n",
      "\n",
      "*transform(X, y=None)*\n",
      "\n",
      "Return a copy of the input array.\n",
      "\n",
      "**Parameters**\n",
      "\n",
      "- `X` : {array-like, sparse matrix}, shape = [n_samples, n_features]\n",
      "\n",
      "    Training vectors, where n_samples is the number of samples and\n",
      "    n_features is the number of features.\n",
      "\n",
      "- `y` : array-like, shape = [n_samples] (default: None)\n",
      "\n",
      "\n",
      "**Returns**\n",
      "\n",
      "- `X_copy` : copy of the input X array.\n",
      "\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "with open('../../api_modules/mlxtend.preprocessing/CopyTransformer.md', 'r') as f:\n",
    "    print(f.read())"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
